# =============================================================================
# FILE 2. DATA PREP PIPELINE — POLITICIAN SURVEYS
# =============================================================================
# PURPOSE
#   Clean, harmonize, and export politician survey datasets (2023 & 2024) for downstream analysis.
#
# REPLICATION / RUN NOTES
#   1) Install required packages listed in Section 0.
#   2) Set the working directory in Section 1 to the folder that contains the input data files.
#   3) Ensure any prerequisite .do files referenced in comments have been run to generate .dta inputs.
#   4) Run this script top-to-bottom in a fresh R session.
#
# INPUT FILES (relative to working directory)
#   - Data_Politicians_2024.dta
#   - March 2023.sav
#   - pf_flanders_2023.dta
#
# OUTPUT FILES (written to working directory)
#   - pf_flanders_2023_data.dta
#   - pf_flanders_2023_data.rds
#   - pf_flanders_2024_data.dta
#   - pf_flanders_2024_data.rds
#   - pf_flanders_data.dta
#   - pf_flanders_data.rds
# =============================================================================

################################################################################
## 1. WORKING DIRECTORY + FILE LOCATIONS
################################################################################

getwd()

# Set project directory (update it to your own directory that contains the input files)
setwd("updated_path_here")

################################################################################
## 2. POLITICIAN SURVEYS
################################################################################

################################################################################
## 2.1. LOAD 2023 DATA
################################################################################

# --- Data ingest: 2023 politician survey (SPSS) ---
# Expectation: March 2023.sav is present in the working directory.
# Load data (2023)
pf.flanders.2023.data <- read_sav("March 2023.sav")

# Clean column names (2023)
colnames(pf.flanders.2023.data) <- gsub("\\.", "_", colnames(pf.flanders.2023.data))

# Add ID column (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  dplyr::mutate(id = row_number())

# --- Auxiliary merge: year-of-birth / additional labels (2023) ---
# Rationale: The .do-derived .dta contains harmonized label variables used later in the pipeline.
# Merge with year of birth data (2023) -- Run "pf_flanders_2023.do" first
pf.flanders.2023.yob <- read.dta("pf_flanders_2023.dta") %>% dplyr::select(Q4_2_label, Q2_8_label) 
pf.flanders.2023.yob <- pf.flanders.2023.yob %>%
  dplyr::mutate(id = row_number())
pf.flanders.2023.data <- merge(pf.flanders.2023.data, pf.flanders.2023.yob, by = "id", all = FALSE)

################################################################################
## 2.2. LOAD 2024 DATA
################################################################################

# Load data (2024)
pf.flanders.2024.data <- read.dta("Data_Politicians_2024.dta")

################################################################################
## 2.3. DEFINE NEW VARIABLES
################################################################################

################################################################################
## 2.3.1. SEAT EXPECTATIONS
################################################################################

# Seat expectations (2023)
pf.flanders.2023.data$seats_pvda <- pf.flanders.2023.data$Q9_1_1
pf.flanders.2023.data$seats_groen <- pf.flanders.2023.data$Q9_1_2
pf.flanders.2023.data$seats_vooruit <- pf.flanders.2023.data$Q9_1_3
pf.flanders.2023.data$seats_cdv <- pf.flanders.2023.data$Q9_1_4
pf.flanders.2023.data$seats_ovld <- pf.flanders.2023.data$Q9_1_5
pf.flanders.2023.data$seats_nva <- pf.flanders.2023.data$Q9_1_6
pf.flanders.2023.data$seats_vb <- pf.flanders.2023.data$Q9_1_7

# Seat expectations: Remove -99 values (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  replace_with_na(replace = list(seats_pvda = c(-99),
                                 seats_groen = c(-99),
                                 seats_vooruit = c(-99),
                                 seats_cdv = c(-99),
                                 seats_ovld = c(-99),
                                 seats_nva = c(-99),
                                 seats_vb = c(-99)))

# Seat expectations: Add labels to answer categories (2023)
pf.flanders.2023.data$seats_pvda <- factor(pf.flanders.2023.data$seats_pvda, 
                                           levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2023.data$seats_groen <- factor(pf.flanders.2023.data$seats_groen, 
                                            levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2023.data$seats_vooruit <- factor(pf.flanders.2023.data$seats_vooruit, 
                                              levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2023.data$seats_cdv <- factor(pf.flanders.2023.data$seats_cdv, 
                                          levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2023.data$seats_ovld <- factor(pf.flanders.2023.data$seats_ovld, 
                                           levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2023.data$seats_nva <- factor(pf.flanders.2023.data$seats_nva, 
                                          levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2023.data$seats_vb <- factor(pf.flanders.2023.data$seats_vb, 
                                         levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))

# Seat expectations (2024)
pf.flanders.2024.data$seats_pvda <- pf.flanders.2024.data$Q1_4
pf.flanders.2024.data$seats_groen <- pf.flanders.2024.data$Q1_5
pf.flanders.2024.data$seats_vooruit <- pf.flanders.2024.data$Q1_6
pf.flanders.2024.data$seats_cdv <- pf.flanders.2024.data$Q1_7
pf.flanders.2024.data$seats_ovld <- pf.flanders.2024.data$Q1_9
pf.flanders.2024.data$seats_nva <- pf.flanders.2024.data$Q1_10
pf.flanders.2024.data$seats_vb <- pf.flanders.2024.data$Q1_11

# Seat expectations: Remove -99 values (2023)
pf.flanders.2024.data <- pf.flanders.2024.data %>%
  replace_with_na(replace = list(seats_pvda = c(-99),
                                 seats_groen = c(-99),
                                 seats_vooruit = c(-99),
                                 seats_cdv = c(-99),
                                 seats_ovld = c(-99),
                                 seats_nva = c(-99),
                                 seats_vb = c(-99)))

# Seat expectations: Add labels to answer categories (2024)
pf.flanders.2024.data$seats_pvda <- factor(pf.flanders.2024.data$seats_pvda, 
                                           levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2024.data$seats_groen <- factor(pf.flanders.2024.data$seats_groen, 
                                            levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2024.data$seats_vooruit <- factor(pf.flanders.2024.data$seats_vooruit, 
                                              levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2024.data$seats_cdv <- factor(pf.flanders.2024.data$seats_cdv, 
                                          levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2024.data$seats_ovld <- factor(pf.flanders.2024.data$seats_ovld, 
                                           levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2024.data$seats_nva <- factor(pf.flanders.2024.data$seats_nva, 
                                          levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))
pf.flanders.2024.data$seats_vb <- factor(pf.flanders.2024.data$seats_vb, 
                                         levels = c(3, 2, 1), labels = c("Fewer", "Same", "More"))

################################################################################
## 2.3.2. CORRECT SEAT EXPECTATIONS
################################################################################

# Correct seat expectations: PVDA (2023)
pf.flanders.2023.data$seats_pvda_correct <- NA
pf.flanders.2023.data$seats_pvda_correct[pf.flanders.2023.data$seats_pvda=="More"] <- 1
pf.flanders.2023.data$seats_pvda_correct[pf.flanders.2023.data$seats_pvda=="Same"] <- 0
pf.flanders.2023.data$seats_pvda_correct[pf.flanders.2023.data$seats_pvda=="Fewer"] <- 0

# Correct seat expectations: Groen (2023)
pf.flanders.2023.data$seats_groen_correct <- NA
pf.flanders.2023.data$seats_groen_correct[pf.flanders.2023.data$seats_groen=="More"] <- 0
pf.flanders.2023.data$seats_groen_correct[pf.flanders.2023.data$seats_groen=="Same"] <- 0
pf.flanders.2023.data$seats_groen_correct[pf.flanders.2023.data$seats_groen=="Fewer"] <- 1

# Correct seat expectations: Vooruit (2023)
pf.flanders.2023.data$seats_vooruit_correct <- NA
pf.flanders.2023.data$seats_vooruit_correct[pf.flanders.2023.data$seats_vooruit=="More"] <- 1
pf.flanders.2023.data$seats_vooruit_correct[pf.flanders.2023.data$seats_vooruit=="Same"] <- 0
pf.flanders.2023.data$seats_vooruit_correct[pf.flanders.2023.data$seats_vooruit=="Fewer"] <- 0

# Correct seat expectations: CD&V (2023)
pf.flanders.2023.data$seats_cdv_correct <- NA
pf.flanders.2023.data$seats_cdv_correct[pf.flanders.2023.data$seats_cdv=="More"] <- 0
pf.flanders.2023.data$seats_cdv_correct[pf.flanders.2023.data$seats_cdv=="Same"] <- 0
pf.flanders.2023.data$seats_cdv_correct[pf.flanders.2023.data$seats_cdv=="Fewer"] <- 1

# Correct seat expectations: Open Vld (2023)
pf.flanders.2023.data$seats_ovld_correct <- NA
pf.flanders.2023.data$seats_ovld_correct[pf.flanders.2023.data$seats_ovld=="More"] <- 0
pf.flanders.2023.data$seats_ovld_correct[pf.flanders.2023.data$seats_ovld=="Same"] <- 0
pf.flanders.2023.data$seats_ovld_correct[pf.flanders.2023.data$seats_ovld=="Fewer"] <- 1

# Correct seat expectations: N-VA (2023)
pf.flanders.2023.data$seats_nva_correct <- NA
pf.flanders.2023.data$seats_nva_correct[pf.flanders.2023.data$seats_nva=="More"] <- 0
pf.flanders.2023.data$seats_nva_correct[pf.flanders.2023.data$seats_nva=="Same"] <- 0
pf.flanders.2023.data$seats_nva_correct[pf.flanders.2023.data$seats_nva=="Fewer"] <- 1

# Correct seat expectations: Vlaams Belang (2023)
pf.flanders.2023.data$seats_vb_correct <- NA
pf.flanders.2023.data$seats_vb_correct[pf.flanders.2023.data$seats_vb=="More"] <- 1
pf.flanders.2023.data$seats_vb_correct[pf.flanders.2023.data$seats_vb=="Same"] <- 0
pf.flanders.2023.data$seats_vb_correct[pf.flanders.2023.data$seats_vb=="Fewer"] <- 0

# Correct seat forecast: PVDA (2024)
pf.flanders.2024.data$seats_pvda_correct <- NA
pf.flanders.2024.data$seats_pvda_correct[pf.flanders.2024.data$seats_pvda=="More"] <- 1
pf.flanders.2024.data$seats_pvda_correct[pf.flanders.2024.data$seats_pvda=="Same"] <- 0
pf.flanders.2024.data$seats_pvda_correct[pf.flanders.2024.data$seats_pvda=="Fewer"] <- 0

# Correct seat forecast: Groen (2024)
pf.flanders.2024.data$seats_groen_correct <- NA
pf.flanders.2024.data$seats_groen_correct[pf.flanders.2024.data$seats_groen=="More"] <- 0
pf.flanders.2024.data$seats_groen_correct[pf.flanders.2024.data$seats_groen=="Same"] <- 0
pf.flanders.2024.data$seats_groen_correct[pf.flanders.2024.data$seats_groen=="Fewer"] <- 1

# Correct seat forecast: Vooruit (2024)
pf.flanders.2024.data$seats_vooruit_correct <- NA
pf.flanders.2024.data$seats_vooruit_correct[pf.flanders.2024.data$seats_vooruit=="More"] <- 1
pf.flanders.2024.data$seats_vooruit_correct[pf.flanders.2024.data$seats_vooruit=="Same"] <- 0
pf.flanders.2024.data$seats_vooruit_correct[pf.flanders.2024.data$seats_vooruit=="Fewer"] <- 0

# Correct seat forecast: CD&V (2024)
pf.flanders.2024.data$seats_cdv_correct <- NA
pf.flanders.2024.data$seats_cdv_correct[pf.flanders.2024.data$seats_cdv=="More"] <- 0
pf.flanders.2024.data$seats_cdv_correct[pf.flanders.2024.data$seats_cdv=="Same"] <- 0
pf.flanders.2024.data$seats_cdv_correct[pf.flanders.2024.data$seats_cdv=="Fewer"] <- 1

# Correct seat forecast: Open Vld (2024)
pf.flanders.2024.data$seats_ovld_correct <- NA
pf.flanders.2024.data$seats_ovld_correct[pf.flanders.2024.data$seats_ovld=="More"] <- 0
pf.flanders.2024.data$seats_ovld_correct[pf.flanders.2024.data$seats_ovld=="Same"] <- 0
pf.flanders.2024.data$seats_ovld_correct[pf.flanders.2024.data$seats_ovld=="Fewer"] <- 1

# Correct seat forecast: N-VA (2024)
pf.flanders.2024.data$seats_nva_correct <- NA
pf.flanders.2024.data$seats_nva_correct[pf.flanders.2024.data$seats_nva=="More"] <- 0
pf.flanders.2024.data$seats_nva_correct[pf.flanders.2024.data$seats_nva=="Same"] <- 0
pf.flanders.2024.data$seats_nva_correct[pf.flanders.2024.data$seats_nva=="Fewer"] <- 1

# Correct seat forecast: Vlaams Belang (2024)
pf.flanders.2024.data$seats_vb_correct <- NA
pf.flanders.2024.data$seats_vb_correct[pf.flanders.2024.data$seats_vb=="More"] <- 1
pf.flanders.2024.data$seats_vb_correct[pf.flanders.2024.data$seats_vb=="Same"] <- 0
pf.flanders.2024.data$seats_vb_correct[pf.flanders.2024.data$seats_vb=="Fewer"] <- 0

################################################################################
## 2.3.3. SEAT FORECASTS ACCURACY
################################################################################

# Seat forecasts accuracy (2023)
pf.flanders.2023.data$seats_accuracy <- pf.flanders.2023.data$seats_pvda_correct +
  pf.flanders.2023.data$seats_groen_correct + 
  pf.flanders.2023.data$seats_vooruit_correct +
  pf.flanders.2023.data$seats_cdv_correct +
  pf.flanders.2023.data$seats_ovld_correct +
  pf.flanders.2023.data$seats_nva_correct +
  pf.flanders.2023.data$seats_vb_correct

# Seat forecasts accuracy (2024)
pf.flanders.2024.data$seats_accuracy <- pf.flanders.2024.data$seats_pvda_correct +
  pf.flanders.2024.data$seats_groen_correct + 
  pf.flanders.2024.data$seats_vooruit_correct +
  pf.flanders.2024.data$seats_cdv_correct +
  pf.flanders.2024.data$seats_ovld_correct +
  pf.flanders.2024.data$seats_nva_correct +
  pf.flanders.2024.data$seats_vb_correct 

################################################################################
## 2.3.4. GOVERNMENT FORECASTS
################################################################################

# Government expectations (2023)
pf.flanders.2023.data$govt_pvda <- pf.flanders.2023.data$Q9_2_1
pf.flanders.2023.data$govt_groen <- pf.flanders.2023.data$Q9_2_2
pf.flanders.2023.data$govt_vooruit <- pf.flanders.2023.data$Q9_2_3
pf.flanders.2023.data$govt_cdv <- pf.flanders.2023.data$Q9_2_4
pf.flanders.2023.data$govt_ovld <- pf.flanders.2023.data$Q9_2_5
pf.flanders.2023.data$govt_nva <- pf.flanders.2023.data$Q9_2_6
pf.flanders.2023.data$govt_vb <- pf.flanders.2023.data$Q9_2_7

# Government expectations: Remove -99 values (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  replace_with_na(replace = list(govt_pvda = c(-99),
                                 govt_groen = c(-99),
                                 govt_vooruit = c(-99),
                                 govt_cdv = c(-99),
                                 govt_ovld = c(-99),
                                 govt_nva = c(-99),
                                 govt_vb = c(-99)))

# Government expectations: Rescale from 0 to 10 (2023)
pf.flanders.2023.data$govt_pvda <- (pf.flanders.2023.data$govt_pvda - 1)
pf.flanders.2023.data$govt_groen <- (pf.flanders.2023.data$govt_groen - 1)
pf.flanders.2023.data$govt_vooruit <- (pf.flanders.2023.data$govt_vooruit - 1)
pf.flanders.2023.data$govt_cdv <- (pf.flanders.2023.data$govt_cdv - 1)
pf.flanders.2023.data$govt_ovld <- (pf.flanders.2023.data$govt_ovld - 1)
pf.flanders.2023.data$govt_nva <- (pf.flanders.2023.data$govt_nva - 1)
pf.flanders.2023.data$govt_vb <- (pf.flanders.2023.data$govt_vb - 1)

# Government expectations (2024)
pf.flanders.2024.data$govt_pvda <- pf.flanders.2024.data$Q2_4
pf.flanders.2024.data$govt_groen <- pf.flanders.2024.data$Q2_5
pf.flanders.2024.data$govt_vooruit <- pf.flanders.2024.data$Q2_6
pf.flanders.2024.data$govt_cdv <- pf.flanders.2024.data$Q2_7
pf.flanders.2024.data$govt_ovld <- pf.flanders.2024.data$Q2_9
pf.flanders.2024.data$govt_nva <- pf.flanders.2024.data$Q2_10
pf.flanders.2024.data$govt_vb <- pf.flanders.2024.data$Q2_11

# Government expectations: Remove -99 values (2024)
pf.flanders.2024.data <- pf.flanders.2024.data %>%
  replace_with_na(replace = list(govt_pvda = c(-99),
                                 govt_groen = c(-99),
                                 govt_vooruit = c(-99),
                                 govt_cdv = c(-99),
                                 govt_ovld = c(-99),
                                 govt_nva = c(-99),
                                 govt_vb = c(-99)))

# Government expectations: Rescale from 0 to 10 (2024)
pf.flanders.2024.data$govt_pvda <- (pf.flanders.2024.data$govt_pvda - 1)
pf.flanders.2024.data$govt_groen <- (pf.flanders.2024.data$govt_groen - 1)
pf.flanders.2024.data$govt_vooruit <- (pf.flanders.2024.data$govt_vooruit - 1)
pf.flanders.2024.data$govt_cdv <- (pf.flanders.2024.data$govt_cdv - 1)
pf.flanders.2024.data$govt_ovld <- (pf.flanders.2024.data$govt_ovld - 1)
pf.flanders.2024.data$govt_nva <- (pf.flanders.2024.data$govt_nva - 1)
pf.flanders.2024.data$govt_vb <- (pf.flanders.2024.data$govt_vb - 1)

################################################################################
## 2.3.5. ACTUAL SEAT OUTCOME
################################################################################

# Actual seat outcome (2023)
pf.flanders.2023.data$seats_pvda_outcome <- 1
pf.flanders.2023.data$seats_groen_outcome <- 0
pf.flanders.2023.data$seats_vooruit_outcome <- 1
pf.flanders.2023.data$seats_cdv_outcome <- 0
pf.flanders.2023.data$seats_ovld_outcome <- 0
pf.flanders.2023.data$seats_nva_outcome <- 0
pf.flanders.2023.data$seats_vb_outcome <- 1

# Actual seat outcome (2024)
pf.flanders.2024.data$seats_pvda_outcome <- 1
pf.flanders.2024.data$seats_groen_outcome <- 0
pf.flanders.2024.data$seats_vooruit_outcome <- 1
pf.flanders.2024.data$seats_cdv_outcome <- 0
pf.flanders.2024.data$seats_ovld_outcome <- 0
pf.flanders.2024.data$seats_nva_outcome <- 0
pf.flanders.2024.data$seats_vb_outcome <- 1

################################################################################
## 2.3.6. ACTUAL COALITION OUTCOME
################################################################################

# Actual coalition outcome (2023)
pf.flanders.2023.data$govt_pvda_outcome <- 0
pf.flanders.2023.data$govt_groen_outcome <- 0
pf.flanders.2023.data$govt_vooruit_outcome <- 1
pf.flanders.2023.data$govt_cdv_outcome <- 1
pf.flanders.2023.data$govt_ovld_outcome <- 0
pf.flanders.2023.data$govt_nva_outcome <- 1
pf.flanders.2023.data$govt_vb_outcome <- 0

# Actual coalition outcome (2024)
pf.flanders.2024.data$govt_pvda_outcome <- 0
pf.flanders.2024.data$govt_groen_outcome <- 0
pf.flanders.2024.data$govt_vooruit_outcome <- 1
pf.flanders.2024.data$govt_cdv_outcome <- 1
pf.flanders.2024.data$govt_ovld_outcome <- 0
pf.flanders.2024.data$govt_nva_outcome <- 1
pf.flanders.2024.data$govt_vb_outcome <- 0

################################################################################
## 2.3.7. BRIER SCORES FOR GOVERNMENT PROBABILITY
################################################################################

# Brier scores (2023)
pf.flanders.2023.data$govt_pvda_brier <- 1 - ((pf.flanders.2023.data$govt_pvda/10 - 0)^2)
pf.flanders.2023.data$govt_groen_brier <- 1 - ((pf.flanders.2023.data$govt_groen/10 - 0)^2)
pf.flanders.2023.data$govt_vooruit_brier <- 1 - ((pf.flanders.2023.data$govt_vooruit/10 - 1)^2)
pf.flanders.2023.data$govt_cdv_brier <- 1 - ((pf.flanders.2023.data$govt_cdv/10 - 1)^2)
pf.flanders.2023.data$govt_ovld_brier <- 1 - ((pf.flanders.2023.data$govt_ovld/10 - 0)^2)
pf.flanders.2023.data$govt_nva_brier <- 1 - ((pf.flanders.2023.data$govt_nva/10 - 1)^2)
pf.flanders.2023.data$govt_vb_brier <- 1 - ((pf.flanders.2023.data$govt_vb/10 - 0)^2)

pf.flanders.2023.data$govt_brier <- pf.flanders.2023.data$govt_pvda_brier +
  pf.flanders.2023.data$govt_groen_brier +
  pf.flanders.2023.data$govt_vooruit_brier +
  pf.flanders.2023.data$govt_cdv_brier +
  pf.flanders.2023.data$govt_ovld_brier +
  pf.flanders.2023.data$govt_nva_brier +
  pf.flanders.2023.data$govt_vb_brier

# Brier scores (2024)
pf.flanders.2024.data$govt_pvda_brier <- 1 - ((pf.flanders.2024.data$govt_pvda/10 - 0)^2)
pf.flanders.2024.data$govt_groen_brier <- 1 - ((pf.flanders.2024.data$govt_groen/10 - 0)^2)
pf.flanders.2024.data$govt_vooruit_brier <- 1 - ((pf.flanders.2024.data$govt_vooruit/10 - 1)^2)
pf.flanders.2024.data$govt_cdv_brier <- 1 - ((pf.flanders.2024.data$govt_cdv/10 - 1)^2)
pf.flanders.2024.data$govt_ovld_brier <- 1 - ((pf.flanders.2024.data$govt_ovld/10 - 0)^2)
pf.flanders.2024.data$govt_nva_brier <- 1 - ((pf.flanders.2024.data$govt_nva/10 - 1)^2)
pf.flanders.2024.data$govt_vb_brier <- 1 - ((pf.flanders.2024.data$govt_vb/10 - 0)^2)

pf.flanders.2024.data$govt_brier <- pf.flanders.2024.data$govt_pvda_brier +
  pf.flanders.2024.data$govt_groen_brier +
  pf.flanders.2024.data$govt_vooruit_brier +
  pf.flanders.2024.data$govt_cdv_brier +
  pf.flanders.2024.data$govt_ovld_brier +
  pf.flanders.2024.data$govt_nva_brier +
  pf.flanders.2024.data$govt_vb_brier

################################################################################
## 2.3.8. MANDATE
################################################################################

# Mandate (2023)
pf.flanders.2023.data$mandate <- ""
pf.flanders.2023.data$mandate[pf.flanders.2023.data$Q2_2==1] <- "Mayor"
pf.flanders.2023.data$mandate[pf.flanders.2023.data$Q2_2==2] <- "Alderman"
pf.flanders.2023.data$mandate[pf.flanders.2023.data$Q2_2==3] <- "Councillor"
pf.flanders.2023.data$mandate[pf.flanders.2023.data$Q2_2==4] <- "None of the above"
pf.flanders.2023.data <- pf.flanders.2023.data %>% filter(mandate!="None of the above")
pf.flanders.2023.data <- pf.flanders.2023.data %>% filter(mandate!="")

# Mandate (2024)
pf.flanders.2024.data$mandate <- ""
pf.flanders.2024.data$mandate[pf.flanders.2024.data$Q2==1] <- "Mayor"
pf.flanders.2024.data$mandate[pf.flanders.2024.data$Q2==2] <- "Alderman"
pf.flanders.2024.data$mandate[pf.flanders.2024.data$Q2==3] <- "Councillor"
pf.flanders.2024.data$mandate[pf.flanders.2024.data$Q2==4] <- "None of the above"
pf.flanders.2024.data <- pf.flanders.2024.data %>% filter(mandate!="None of the above")
pf.flanders.2024.data <- pf.flanders.2024.data %>% filter(mandate!="")

################################################################################
## 2.3.9. MP OR LEADERSHIP POSITION AT HIGHER LEVEL
################################################################################

# Member of parliament, minister and/or party leader at the national or regional level (2023)
pf.flanders.2023.data$mp <- ""
pf.flanders.2023.data$mp[pf.flanders.2023.data$Q2_4==1] <- "Yes"
pf.flanders.2023.data$mp[pf.flanders.2023.data$Q2_4==2] <- "No"

# Member of parliament, minister and/or party leader at the national or regional level (2024)
pf.flanders.2024.data$mp <- ""
pf.flanders.2024.data$mp[pf.flanders.2024.data$Q5==1] <- "Yes"
pf.flanders.2024.data$mp[pf.flanders.2024.data$Q5==2] <- "No"

################################################################################
## 2.3.10. ALDERMAN OR MAYOR
################################################################################

# Ever been an alderman or mayor (2023)
pf.flanders.2023.data$alma <- ""
pf.flanders.2023.data$alma[pf.flanders.2023.data$Q2_6==1] <- "Yes"
pf.flanders.2023.data$alma[pf.flanders.2023.data$Q2_6==2] <- "No"

# Ever been an alderman or mayor (2024)
pf.flanders.2024.data$alma <- ""
pf.flanders.2024.data$alma[pf.flanders.2024.data$Q4==1] <- "Yes"
pf.flanders.2024.data$alma[pf.flanders.2024.data$Q4==2] <- "No"

################################################################################
## 2.3.11. FIRST YEAR OATH
################################################################################

# Year first take the oath as a local representative (2023)
pf.flanders.2023.data$oath <- ""
pf.flanders.2023.data$oath <- as.numeric(pf.flanders.2023.data$Q2_8_label)

# Year first take the oath as a local representative (2024)
pf.flanders.2024.data$oath <- ""
pf.flanders.2024.data$oath <- as.numeric(pf.flanders.2024.data$Q6)
pf.flanders.2024.data$oath[pf.flanders.2024.data$Q4==18] <- 2018
pf.flanders.2024.data$oath[pf.flanders.2024.data$Q4==2019] <- 2919
pf.flanders.2024.data$oath[pf.flanders.2024.data$Q4==19198] <- 1998

################################################################################
## 2.3.12. YEARS SINCE FIRST OATH
################################################################################

# Number of years since first taking the oath (2023)
pf.flanders.2023.data$yoath <- 2023 - pf.flanders.2023.data$oath

# Number of years since first taking the oath (2024)
pf.flanders.2024.data$yoath <- 2024 - pf.flanders.2024.data$oath

################################################################################
## 2.3.13. LOCAL GOVERNMENT OR OPPOSITION
################################################################################

# Part of the local government or the local opposition (2023)
pf.flanders.2023.data$gov <- ""
pf.flanders.2023.data$gov[pf.flanders.2023.data$Q3_1==1] <- "Local government"
pf.flanders.2023.data$gov[pf.flanders.2023.data$Q3_1==2] <- "Local opposition"
pf.flanders.2023.data$gov[pf.flanders.2023.data$Q3_1==3] <- "Other"

# Part of the local government or the local opposition (2024)
pf.flanders.2024.data$gov <- ""
pf.flanders.2024.data$gov[pf.flanders.2024.data$Q7==3] <- "Local government"
pf.flanders.2024.data$gov[pf.flanders.2024.data$Q7==4] <- "Local opposition"

################################################################################
## 2.3.14. SEX
################################################################################

# Sex (2023)
pf.flanders.2023.data$sex <- pf.flanders.2023.data$Q4_1
pf.flanders.2023.data$sex[pf.flanders.2023.data$Q4_1==3] <- NA
pf.flanders.2023.data$sex[pf.flanders.2023.data$Q4_1==-99] <- NA
pf.flanders.2023.data$sex[pf.flanders.2023.data$Q4_1==2] <- 0

# sEX: Add value labels for answer categories (2023)
pf.flanders.2023.data$sex <- factor(pf.flanders.2023.data$sex, 
                                    levels = c(0, 1), labels = c("Female", "Male"))

# Sex: frequency table (2023) 
# Reference: Appendix - Table A1
pf.flanders.2023.data1 <- pf.flanders.2023.data %>% filter(groep==1)
freq_table <- table(pf.flanders.2023.data1$Q4_1)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

# Sex (2024)
pf.flanders.2024.data$sex <- pf.flanders.2024.data$Q1
pf.flanders.2024.data$sex[pf.flanders.2024.data$Q1==3] <- NA
pf.flanders.2024.data$sex[pf.flanders.2024.data$Q1==-99] <- NA
pf.flanders.2024.data$sex[pf.flanders.2024.data$Q1==2] <- 0

# Sex: Add value labels for answer categories (2024)
pf.flanders.2024.data$sex <- factor(pf.flanders.2024.data$sex, 
                                    levels = c(0, 1), labels = c("Female", "Male"))

# Sex: frequency table (2024)
# Reference: Appendix - Table A1
freq_table <- table(pf.flanders.2024.data$Q1)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

################################################################################
## 2.3.15. AGE
################################################################################

# Age (2023)
pf.flanders.2023.data$yob <- as.numeric(pf.flanders.2023.data$Q4_2_label)
pf.flanders.2023.data$age <- 2023 - pf.flanders.2023.data$yob

# Age group (2023)
pf.flanders.2023.data$agegroup <- ""
pf.flanders.2023.data$agegroup[pf.flanders.2023.data$age >= 18 & pf.flanders.2023.data$age <= 34] <- "18-34"
pf.flanders.2023.data$agegroup[pf.flanders.2023.data$age >= 35 & pf.flanders.2023.data$age <= 54] <- "35-54"
pf.flanders.2023.data$agegroup[pf.flanders.2023.data$age >= 55] <- "55+"

# Age group: frequency table (2023)
# Reference: Appendix - Table A1
pf.flanders.2023.data1 <- pf.flanders.2023.data %>% filter(groep==1)
freq_table <- table(pf.flanders.2023.data1$agegroup)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

# Age (2024)
pf.flanders.2024.data$yob <- as.numeric(pf.flanders.2024.data$"_v2")
pf.flanders.2024.data$age <- 2024 - pf.flanders.2024.data$yob

# Age group (2024)
pf.flanders.2024.data$agegroup <- ""
pf.flanders.2024.data$agegroup[pf.flanders.2024.data$age >= 18 & pf.flanders.2024.data$age <= 34] <- "18-34"
pf.flanders.2024.data$agegroup[pf.flanders.2024.data$age >= 35 & pf.flanders.2024.data$age <= 54] <- "35-54"
pf.flanders.2024.data$agegroup[pf.flanders.2024.data$age >= 55] <- "55+"

# Age group: frequency table (2024)
# Reference: Appendix - Table A1
freq_table <- table(pf.flanders.2024.data$agegroup)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

################################################################################
## 2.3.16. PARTY AFFILIATION
################################################################################

# Party affiliation: Many party IDs? (2023)
pf.flanders.2023.data$manyids <- apply(pf.flanders.2023.data[, c("Q3_2_1", "Q3_2_2", "Q3_2_3", "Q3_2_4", 
                                                                 "Q3_2_5", "Q3_2_6", "Q3_2_7", "Q3_2_8",
                                                                 "Q3_2_9")], 1, function(x) sum(x == 1) > 1)

# Party affiliation: Convert party ID columns to character (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  mutate_at(vars("Q3_2_1", "Q3_2_2", "Q3_2_3", "Q3_2_4", "Q3_2_5", "Q3_2_6", "Q3_2_7", "Q3_2_8"),
            as.character)

# Party affiliation: Remove -99 values and recode (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  dplyr::mutate(Q3_2_1 = recode(Q3_2_1, "1" = "CD&V", "-99" = ""),
         Q3_2_2 = recode(Q3_2_2, "1" = "Groen", "-99" = ""),
         Q3_2_3 = recode(Q3_2_3, "1" = "N-VA", "-99" = ""),
         Q3_2_4 = recode(Q3_2_4, "1" = "Open Vld", "-99" = ""),
         Q3_2_5 = recode(Q3_2_5, "1" = "PVDA", "-99" = ""),
         Q3_2_6 = recode(Q3_2_6, "1" = "Vooruit", "-99" = ""),
         Q3_2_7 = recode(Q3_2_7, "1" = "Vlaams Belang", "-99" = ""),
         Q3_2_8 = recode(Q3_2_8, "1" = "Other/Independent", "-99" = ""),
         Q3_2_9 = recode(Q3_2_9, "1" = "Other/Independent", "-99" = "")
  )

# Party affiliation: Concatenate values from party IDs columns in a single column (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  dplyr::mutate(affiliation2 = apply(pf.flanders.2023.data[, c("Q3_2_1", "Q3_2_2", "Q3_2_3", "Q3_2_4", "Q3_2_5", "Q3_2_6", "Q3_2_7", "Q3_2_8")], 1, function(x) {
    # Remove NA and empty strings, then concatenate non-empty values
    paste(na.omit(x[x != ""]), collapse = ", ")
  }))


# Party affiliation: Create party affiliation column (2023)
pf.flanders.2023.data$affiliation <- pf.flanders.2023.data$affiliation2

# Party affiliation: Replace multiple party ids by NAs (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation = ifelse(manyids == TRUE, NA, affiliation))

# Party affiliation: Add value labels to answer categories (2023)
pf.flanders.2023.data$affiliation <- factor(pf.flanders.2023.data$affiliation,
                                            levels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                       "Open Vld", "N-VA", "Vlaams Belang",
                                                       "Other/Independent"), 
                                            labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                       "Open Vld", "N-VA", "Vlaams Belang",
                                                       "Other/Independent"))

# Party affiliation: CD&V (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation_cdv = ifelse(str_detect(affiliation2, "CD&V"), 1, 0))

# Party affiliation: Groen (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation_groen = ifelse(str_detect(affiliation2, "Groen"), 1, 0))

# Party affiliation: N-VA (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation_nva = ifelse(str_detect(affiliation2, "N-VA"), 1, 0))

# Party affiliation: Open Vld (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation_ovld = ifelse(str_detect(affiliation2, "Open Vld"), 1, 0))

# Party affiliation: PVDA (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation_pvda = ifelse(str_detect(affiliation2, "PVDA"), 1, 0))

# Party affiliation: Vooruit (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation_vooruit = ifelse(str_detect(affiliation2, "Vooruit"), 1, 0))

# Party affiliation: Vlaams Belang (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(affiliation_vb = ifelse(str_detect(affiliation2, "Vlaams Belang"), 1, 0))

# Party affiliation: Rename affiliation column to party (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  dplyr::rename(party = affiliation,
                party2 = affiliation2)

# Party affiliation: frequency table (2023)
# Reference: Appendix - Table A1
pf.flanders.2023.data1 <- pf.flanders.2023.data %>% filter(groep==1)
freq_table <- table(pf.flanders.2023.data1$party)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

# Party affiliation (2024)
pf.flanders.2024.data$affiliation <- ""
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==2] <- "CD&V"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==3] <- "Groen"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==5] <- "N-VA"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==4] <- "Open Vld"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==8] <- "PVDA"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==1] <- "Vooruit"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==6] <- "Vlaams Belang"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==7] <- "Other/Independent"
pf.flanders.2024.data$affiliation[pf.flanders.2024.data$Q26==9] <- "Other/Independent"

# Party affiliation: Add value labels for answer categories (2024)
pf.flanders.2024.data$affiliation <- factor(pf.flanders.2024.data$affiliation,
                                            levels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                       "Open Vld", "N-VA", "Vlaams Belang",
                                                       "Other/Independent"), 
                                            labels = c("PVDA", "Groen", "Vooruit", "CD&V", 
                                                       "Open Vld", "N-VA", "Vlaams Belang",
                                                       "Other/Independent"))

# Party affiliation: CD&V (2024)
pf.flanders.2024.data$affiliation_cdv[pf.flanders.2024.data$Q26==2] <- 1
pf.flanders.2024.data$affiliation_cdv[pf.flanders.2024.data$Q26!=2] <- 0

# Party affiliation: Groen (2024)
pf.flanders.2024.data$affiliation_groen[pf.flanders.2024.data$Q26==3] <- 1
pf.flanders.2024.data$affiliation_groen[pf.flanders.2024.data$Q26!=3] <- 0

# Party affiliation: N-VA (2024)
pf.flanders.2024.data$affiliation_nva[pf.flanders.2024.data$Q26==5] <- 1
pf.flanders.2024.data$affiliation_nva[pf.flanders.2024.data$Q26!=5] <- 0

# Party affiliation: Open Vld (2024)
pf.flanders.2024.data$affiliation_ovld[pf.flanders.2024.data$Q26==4] <- 1
pf.flanders.2024.data$affiliation_ovld[pf.flanders.2024.data$Q26!=4] <- 0

# Party affiliation: PVDA (2024)
pf.flanders.2024.data$affiliation_pvda[pf.flanders.2024.data$Q26==8] <- 1
pf.flanders.2024.data$affiliation_pvda[pf.flanders.2024.data$Q26!=8] <- 0

# Party affiliation: Vooruit (2024)
pf.flanders.2024.data$affiliation_vooruit[pf.flanders.2024.data$Q26==1] <- 1
pf.flanders.2024.data$affiliation_vooruit[pf.flanders.2024.data$Q26!=1] <- 0

# Party affiliation: Vlaams Belang (2024)
pf.flanders.2024.data$affiliation_vb[pf.flanders.2024.data$Q26==6] <- 1
pf.flanders.2024.data$affiliation_vb[pf.flanders.2024.data$Q26!=6] <- 0

# Rename affiliation column to party (2024)
pf.flanders.2024.data <- pf.flanders.2024.data %>%
  dplyr::rename(party = affiliation)

# Party affiliation: frequency table (2024)
# Reference: Appendix - Table A1
freq_table <- table(pf.flanders.2024.data$party)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

################################################################################
## 2.3.17. SEAT WINNERS
################################################################################

# Seat winners (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(seats_winner = ifelse(str_detect(party, "PVDA") | 
                                 str_detect(party, "Vooruit") | 
                                 str_detect(party, "Vlaams Belang"), 1, 0))

# Seat winners (2024)
pf.flanders.2024.data$seats_winner <- NA
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==2] <- 0
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==3] <- 0
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==5] <- 0
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==4] <- 0
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==8] <- 1
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==1] <- 1
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==6] <- 1
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==7] <- 0
pf.flanders.2024.data$seats_winner[pf.flanders.2024.data$Q26==9] <- 0

################################################################################
## 2.3.18. COALITION WINNERS
################################################################################

# Coalition winners (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>% 
  mutate(govt_winner = ifelse(str_detect(party, "N-VA") | 
                                str_detect(party, "CD&V") | 
                                str_detect(party, "Vooruit"), 1, 0))

# Coalition winners (2024)
pf.flanders.2024.data$govt_winner <- NA
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==2] <- 1
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==3] <- 0
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==5] <- 1
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==4] <- 0
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==8] <- 0
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==1] <- 1
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==6] <- 0
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==7] <- 0
pf.flanders.2024.data$govt_winner[pf.flanders.2024.data$Q26==9] <- 0

################################################################################
## 2.3.19. LEFT-RIGHT IDEOLOGY
################################################################################

# Personal left-right ideology (2023)
pf.flanders.2023.data$ideology <- pf.flanders.2023.data$Q4_4
pf.flanders.2023.data$ideology[pf.flanders.2023.data$Q4_4==-99] <- NA
pf.flanders.2023.data$ideology <- pf.flanders.2023.data$ideology - 1

# Left-right ideology of party closest to (2024)
pf.flanders.2024.data$ideology <- pf.flanders.2024.data$"_v1" - 1
pf.flanders.2024.data$ideology[pf.flanders.2024.data$"_v1"==-99] <- NA

################################################################################
## 2.3.20. LEFT-RIGHT IDEOLOGY
################################################################################

# Education (2023 only)
pf.flanders.2023.data$education <- pf.flanders.2023.data$Q4_3
pf.flanders.2023.data$education[pf.flanders.2023.data$Q4_3==-99] <- NA

# Education: Add value labels to answer categories (2023 only)
pf.flanders.2023.data$education <- factor(pf.flanders.2023.data$education, 
                                          levels = c(1, 2, 3, 4), 
                                          labels = c("Primary education/None", 
                                                     "Secondary education", 
                                                     "Higher non-university education",
                                                     "Higher university education"))

# Education: University degree (2023 only)
pf.flanders.2023.data$university <- NA
pf.flanders.2023.data$university[pf.flanders.2023.data$Q4_3==1] <- 0
pf.flanders.2023.data$university[pf.flanders.2023.data$Q4_3==2] <- 0
pf.flanders.2023.data$university[pf.flanders.2023.data$Q4_3==3] <- 0
pf.flanders.2023.data$university[pf.flanders.2023.data$Q4_3==4] <- 1

# Education (University degree): Add value labels to answer categories (2023 only)
pf.flanders.2023.data$university <- factor(pf.flanders.2023.data$university, 
                                           levels = c(0, 1), 
                                           labels = c("No university", 
                                                      "University"))

# Education: Create 3-category variable (2023 only)
pf.flanders.2023.data$education3 <- as.numeric(pf.flanders.2023.data$Q4_3)
pf.flanders.2023.data$education3 <- NA
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==1] <- 1
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==2] <- 1
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==3] <- 2
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==4] <- 3
pf.flanders.2023.data$education3[pf.flanders.2023.data$Q4_3==-99] <- NA

# Education (3-category): Add value labels to answer categories (2023 only)
pf.flanders.2023.data$education3 <- factor(pf.flanders.2023.data$education3, 
                                           levels = c(1, 2, 3), 
                                           labels = c("Low", 
                                                      "Moderate", 
                                                      "High"))

# Education: frequency table (2023 only)
# Reference: Appendix - Table A1
pf.flanders.2023.data1 <- pf.flanders.2023.data %>% filter(groep==1)
freq_table <- table(pf.flanders.2023.data1$education)
percentages <- prop.table(freq_table) * 100
result <- cbind(Frequency = freq_table, Percentage = percentages)
print(result)

################################################################################
## 2.3.21. SURVEY YEAR
################################################################################

# Survey year (2023)
pf.flanders.2023.data$survey <- 2023

# Survey year (2024)
pf.flanders.2024.data$survey <- 2024

################################################################################
## 2.3.21. SURVEY TYPE
################################################################################

# Survey type (2023)
pf.flanders.2023.data$type <- 1

# Survey type (2024)
pf.flanders.2024.data$type <- 1

################################################################################
## 2.4. DEFINE VARIABLE TYPE
################################################################################

# Define variable type (2023)
pf.flanders.2023.data$party <- as.character(pf.flanders.2023.data$party)
pf.flanders.2023.data$affiliation_pvda <- as.factor(pf.flanders.2023.data$affiliation_pvda)
pf.flanders.2023.data$affiliation_groen <- as.factor(pf.flanders.2023.data$affiliation_groen)
pf.flanders.2023.data$affiliation_vooruit <- as.factor(pf.flanders.2023.data$affiliation_vooruit)
pf.flanders.2023.data$affiliation_ovld <- as.factor(pf.flanders.2023.data$affiliation_ovld)
pf.flanders.2023.data$affiliation_cdv <- as.factor(pf.flanders.2023.data$affiliation_cdv)
pf.flanders.2023.data$affiliation_nva <- as.factor(pf.flanders.2023.data$affiliation_nva)
pf.flanders.2023.data$affiliation_vb <- as.factor(pf.flanders.2023.data$affiliation_vb)
pf.flanders.2023.data$seats_accuracy <- as.factor(pf.flanders.2023.data$seats_accuracy)
pf.flanders.2023.data$sex <- as.factor(pf.flanders.2023.data$sex)
pf.flanders.2023.data$age <- as.numeric(pf.flanders.2023.data$age)
pf.flanders.2023.data$education <- as.numeric(pf.flanders.2023.data$education)
pf.flanders.2023.data$education3 <- as.factor(pf.flanders.2023.data$education3)
pf.flanders.2023.data$ideology <- as.numeric(pf.flanders.2023.data$ideology)
pf.flanders.2023.data$survey <- as.factor(pf.flanders.2023.data$survey)
pf.flanders.2023.data$type <- as.factor(pf.flanders.2023.data$type)
pf.flanders.2023.data$seats_pvda <- as.factor(pf.flanders.2023.data$seats_pvda)
pf.flanders.2023.data$seats_groen <- as.factor(pf.flanders.2023.data$seats_groen)
pf.flanders.2023.data$seats_vooruit <- as.factor(pf.flanders.2023.data$seats_vooruit)
pf.flanders.2023.data$seats_cdv <- as.factor(pf.flanders.2023.data$seats_cdv)
pf.flanders.2023.data$seats_ovld <- as.factor(pf.flanders.2023.data$seats_ovld)
pf.flanders.2023.data$seats_nva <- as.factor(pf.flanders.2023.data$seats_nva)
pf.flanders.2023.data$seats_vb <- as.factor(pf.flanders.2023.data$seats_vb)
pf.flanders.2023.data$govt_pvda <- as.numeric(pf.flanders.2023.data$govt_pvda)
pf.flanders.2023.data$govt_groen <- as.numeric(pf.flanders.2023.data$govt_groen)
pf.flanders.2023.data$govt_vooruit <- as.numeric(pf.flanders.2023.data$govt_vooruit)
pf.flanders.2023.data$govt_cdv <- as.numeric(pf.flanders.2023.data$govt_cdv)
pf.flanders.2023.data$govt_ovld <- as.numeric(pf.flanders.2023.data$govt_ovld)
pf.flanders.2023.data$govt_nva <- as.numeric(pf.flanders.2023.data$govt_nva)
pf.flanders.2023.data$govt_vb <- as.numeric(pf.flanders.2023.data$govt_vb)
pf.flanders.2023.data$seats_accuracy <- as.numeric(pf.flanders.2023.data$seats_accuracy)

# Define variable type (2024)
pf.flanders.2024.data$party <- as.character(pf.flanders.2024.data$party)
pf.flanders.2024.data$affiliation_pvda <- as.factor(pf.flanders.2024.data$affiliation_pvda)
pf.flanders.2024.data$affiliation_groen <- as.factor(pf.flanders.2024.data$affiliation_groen)
pf.flanders.2024.data$affiliation_vooruit <- as.factor(pf.flanders.2024.data$affiliation_vooruit)
pf.flanders.2024.data$affiliation_ovld <- as.factor(pf.flanders.2024.data$affiliation_ovld)
pf.flanders.2024.data$affiliation_cdv <- as.factor(pf.flanders.2024.data$affiliation_cdv)
pf.flanders.2024.data$affiliation_nva <- as.factor(pf.flanders.2024.data$affiliation_nva)
pf.flanders.2024.data$affiliation_vb <- as.factor(pf.flanders.2024.data$affiliation_vb)
pf.flanders.2024.data$seats_accuracy <- as.factor(pf.flanders.2024.data$seats_accuracy)
pf.flanders.2024.data$sex <- as.factor(pf.flanders.2024.data$sex)
pf.flanders.2024.data$age <- as.numeric(pf.flanders.2024.data$age)
pf.flanders.2024.data$ideology <- as.numeric(pf.flanders.2024.data$ideology)
pf.flanders.2024.data$survey <- as.factor(pf.flanders.2024.data$survey)
pf.flanders.2024.data$type <- as.factor(pf.flanders.2024.data$type)
pf.flanders.2024.data$seats_pvda <- as.factor(pf.flanders.2024.data$seats_pvda)
pf.flanders.2024.data$seats_groen <- as.factor(pf.flanders.2024.data$seats_groen)
pf.flanders.2024.data$seats_vooruit <- as.factor(pf.flanders.2024.data$seats_vooruit)
pf.flanders.2024.data$seats_cdv <- as.factor(pf.flanders.2024.data$seats_cdv)
pf.flanders.2024.data$seats_ovld <- as.factor(pf.flanders.2024.data$seats_ovld)
pf.flanders.2024.data$seats_nva <- as.factor(pf.flanders.2024.data$seats_nva)
pf.flanders.2024.data$seats_vb <- as.factor(pf.flanders.2024.data$seats_vb)
pf.flanders.2024.data$govt_pvda <- as.numeric(pf.flanders.2024.data$govt_pvda)
pf.flanders.2024.data$govt_groen <- as.numeric(pf.flanders.2024.data$govt_groen)
pf.flanders.2024.data$govt_vooruit <- as.numeric(pf.flanders.2024.data$govt_vooruit)
pf.flanders.2024.data$govt_cdv <- as.numeric(pf.flanders.2024.data$govt_cdv)
pf.flanders.2024.data$govt_ovld <- as.numeric(pf.flanders.2024.data$govt_ovld)
pf.flanders.2024.data$govt_nva <- as.numeric(pf.flanders.2024.data$govt_nva)
pf.flanders.2024.data$govt_vb <- as.numeric(pf.flanders.2024.data$govt_vb)
pf.flanders.2024.data$seats_accuracy <- as.numeric(pf.flanders.2024.data$seats_accuracy)

################################################################################
## 2.5. SELECT VARIABLES
################################################################################

# Select variables (2023)
pf.flanders.2023.data <- pf.flanders.2023.data %>%
  dplyr::select(seats_pvda, seats_groen, seats_vooruit, seats_cdv, seats_ovld, seats_nva, seats_vb,
                govt_pvda, govt_groen, govt_vooruit, govt_cdv, govt_ovld, govt_nva, govt_vb,
                affiliation_pvda, affiliation_groen, affiliation_vooruit, affiliation_ovld, affiliation_cdv, 
                affiliation_nva, affiliation_vb, ideology, education, education3, university, sex, age, survey, type,
                mandate, yob, party, oath, yoath, mp, alma, gov, 
                seats_winner, govt_winner, seats_accuracy, govt_brier,
                seats_pvda_correct, seats_groen_correct, seats_vooruit_correct, 
                seats_cdv_correct, seats_ovld_correct, seats_nva_correct, 
                seats_vb_correct, govt_pvda_brier, govt_groen_brier, govt_vooruit_brier,
                govt_cdv_brier, govt_ovld_brier, govt_nva_brier, govt_vb_brier)

# Select variables (2024)
pf.flanders.2024.data <- pf.flanders.2024.data %>%
  dplyr::select(seats_pvda, seats_groen, seats_vooruit, seats_cdv, seats_ovld, seats_nva, seats_vb,
                govt_pvda, govt_groen, govt_vooruit, govt_cdv, govt_ovld, govt_nva, govt_vb,
                affiliation_pvda, affiliation_groen, affiliation_vooruit, affiliation_ovld, affiliation_cdv, 
                affiliation_nva, affiliation_vb, ideology, sex, age, survey, type,
                mandate, yob, party, oath, yoath, mp, alma, gov, 
                seats_winner, govt_winner, seats_accuracy, govt_brier,
                seats_pvda_correct, seats_groen_correct, seats_vooruit_correct, 
                seats_cdv_correct, seats_ovld_correct, seats_nva_correct, 
                seats_vb_correct, govt_pvda_brier, govt_groen_brier, govt_vooruit_brier,
                govt_cdv_brier, govt_ovld_brier, govt_nva_brier, govt_vb_brier)

################################################################################
## 2.6. SAVE DATAFRAMES
################################################################################

# Save dataframe in Stata format (2023)
write_dta(pf.flanders.2023.data, "pf_flanders_2023_data.dta")

# Save dataframe in R format (2023)
saveRDS(pf.flanders.2023.data, "pf_flanders_2023_data.rds")

# Save dataframe in Stata format (2024)
write_dta(pf.flanders.2024.data, "pf_flanders_2024_data.dta")

# Save dataframe in R format (2023)
saveRDS(pf.flanders.2024.data, "pf_flanders_2024_data.rds")

################################################################################
## 2.7. MERGE 2023 AND 2024 DATAFRAMES
################################################################################

# Merge 2023 and 2024 dataframes 
pf.flanders.data <- dplyr::bind_rows(pf.flanders.2023.data, pf.flanders.2024.data)

# Save dataframe in Stata format
write_dta(pf.flanders.data, "pf_flanders_data.dta")

# Save dataframe in R format
saveRDS(pf.flanders.data, "pf_flanders_data.rds")

# =============================================================================
# END OF SCRIPT
# =============================================================================